library(phyloseq); packageVersion("phyloseq")
## [1] '1.46.0'
library(ggplot2); packageVersion("ggplot2")
## [1] '3.5.1'
library(readr); packageVersion("readr")
## [1] '2.1.5'
library(tidyr); packageVersion("tidyr")
## [1] '1.3.1'
library(purrr); packageVersion("purrr")
## [1] '1.0.2'
library(furrr); packageVersion("furrr")
## Loading required package: future
## Warning: package 'future' was built under R version 4.3.3
## [1] '0.3.1'
library(dplyr); packageVersion("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] '1.1.4'
library(stringr); packageVersion("stringr")
## [1] '1.5.1'
library(forcats); packageVersion("forcats")
## [1] '1.0.0'
library(metacoder); packageVersion("metacoder")
## This is metacoder version 0.3.7 (stable)
## 
## Attaching package: 'metacoder'
## The following object is masked from 'package:ggplot2':
## 
##     map_data
## The following object is masked from 'package:phyloseq':
## 
##     filter_taxa
## [1] '0.3.7'
library(data.table); packageVersion("data.table")
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## [1] '1.15.4'
library(decontam); packageVersion("decontam")
## [1] '1.22.0'
library(Biostrings); packageVersion("Biostrings")
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
## 
##     first, second
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
## 
##     shift
## The following object is masked from 'package:metacoder':
## 
##     reverse
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## The following object is masked from 'package:phyloseq':
## 
##     distance
## Loading required package: XVector
## 
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
## 
##     compact
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:metacoder':
## 
##     complement
## The following object is masked from 'package:base':
## 
##     strsplit
## [1] '2.70.3'
library(magick); packageVersion("magick")
## Warning: package 'magick' was built under R version 4.3.3
## Linking to ImageMagick 6.9.12.93
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11
## [1] '2.8.4'
library(vegan); packageVersion("vegan")
## Warning: package 'vegan' was built under R version 4.3.3
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1
## [1] '2.6.6.1'
library(pdftools);packageVersion("pdftools")
## Warning: package 'pdftools' was built under R version 4.3.3
## Using poppler version 23.04.0
## [1] '3.4.1'
library(vegan); packageVersion("vegan")
## [1] '2.6.6.1'
library(grid)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
knitr::opts_knit$set(root.dir = "~/benchmark_demulticoder")

Load outputs from the demulticoder workflow

seed <- 1
set.seed(seed)

asv_matrix_rps10<-read.csv("demulticoder/data/final_asv_abundance_matrix_rps10.csv")
asv_matrix_rps10$dada2_tax <- asv_matrix_rps10$dada2_tax <- gsub("Stramenopila", "Eukaryota--100--Domain;Stramenopila", asv_matrix_rps10$dada2_tax)
asv_matrix_rps10 <- asv_matrix_rps10[, -1]
colnames(asv_matrix_rps10)[3:ncol(asv_matrix_rps10)] <- gsub("_rps10$", "", colnames(asv_matrix_rps10)[3:ncol(asv_matrix_rps10)])

asv_matrix_its<-read.csv("demulticoder/data/final_asv_abundance_matrix_its.csv")
asv_matrix_its$dada2_tax <- gsub("Fungi", "Eukaryota--100--Domain;Fungi", asv_matrix_its$dada2_tax)
asv_matrix_its <- asv_matrix_its[, -1]
colnames(asv_matrix_its)[3:ncol(asv_matrix_rps10)] <- gsub("_its$", "", colnames(asv_matrix_its)[3:ncol(asv_matrix_its)])


#Let's combine these matrices
#For easier analysis, we previously combined the two matrices, and appended domain info to each one so we can make one heat tree for combined dataset
sample_cols_its <- setdiff(names(asv_matrix_its), c("sequence", "dada2_tax"))
sample_cols_rps10 <- setdiff(names(asv_matrix_rps10), c("sequence", "dada2_tax"))

if(!all(sample_cols_its == sample_cols_rps10)) {
  stop("Sample columns do not match between ITS and RPS10 dataframes!")
}

abundance <- rbind(
 asv_matrix_its[, c("sequence", "dada2_tax", sample_cols_its)],  # ITS data
  asv_matrix_rps10[, c("sequence", "dada2_tax", sample_cols_rps10)]  # RPS10 data
)

write.csv(abundance, "demulticoder/data/final_asv_abundance_matrix_combined_demulticoder.csv")

metadata_path <- file.path("demulticoder/data/metadata.csv")
metadata <- read_csv(metadata_path)
## Rows: 174 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): sample_name, well, organism, sample_type
## dbl (3): plate, path_conc, experiment
## lgl (2): flooded, is_ambiguous
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(metadata)
## # A tibble: 174 × 9
##    sample_name plate well  organism flooded path_conc experiment sample_type
##    <chr>       <dbl> <chr> <chr>    <lgl>       <dbl>      <dbl> <chr>      
##  1 S1              1 A01   Cry      TRUE          100          1 Sample     
##  2 S10             1 B02   Cry      TRUE            1          1 Sample     
##  3 S11             1 C02   Plu      FALSE           1          1 Sample     
##  4 S12             1 D02   Plu      TRUE            1          1 Sample     
##  5 S13             1 E02   Control  TRUE            0          1 Sample     
##  6 S14             1 F02   Cin      TRUE            1          1 Sample     
##  7 S15             1 H02   Cin      FALSE         100          1 Sample     
##  8 S16             1 A03   Cry      FALSE         100          1 Sample     
##  9 S17             1 B03   Cry      TRUE          100          1 Sample     
## 10 S18             1 C03   Control  TRUE            0          1 Sample     
## # ℹ 164 more rows
## # ℹ 1 more variable: is_ambiguous <lgl>
#Fourth, let's track reads
track_reads_demulticoder_its<-read.csv("demulticoder/data/track_reads_its.csv", row.names = 1)
track_reads_demulticoder_rps10<-read.csv("demulticoder/data/track_reads_rps10.csv", row.names = 1)

Let’s collect some summary stats for each dataset

its_summary_reads<-summary(track_reads_demulticoder_its)
print(its_summary_reads)
##      input           filtered       denoisedF       denoisedR    
##  Min.   :    82   Min.   :    1   Min.   :    1   Min.   :    1  
##  1st Qu.: 23440   1st Qu.:12671   1st Qu.:12375   1st Qu.:12308  
##  Median : 37507   Median :23008   Median :22684   Median :22513  
##  Mean   : 42989   Mean   :25466   Mean   :25212   Mean   :25078  
##  3rd Qu.: 57640   3rd Qu.:31912   3rd Qu.:31614   3rd Qu.:31508  
##  Max.   :136509   Max.   :87421   Max.   :87247   Max.   :87170  
##      merged         nonchim     
##  Min.   :    0   Min.   :    0  
##  1st Qu.:11794   1st Qu.:11794  
##  Median :21770   Median :21770  
##  Mean   :24322   Mean   :24230  
##  3rd Qu.:30406   3rd Qu.:30050  
##  Max.   :86880   Max.   :86868
rps10_summary_reads<-summary(track_reads_demulticoder_rps10)
print(rps10_summary_reads)
##      input           filtered        denoisedF        denoisedR     
##  Min.   :    91   Min.   :    26   Min.   :     7   Min.   :     6  
##  1st Qu.: 37994   1st Qu.: 29199   1st Qu.: 29186   1st Qu.: 29184  
##  Median : 61070   Median : 48085   Median : 47950   Median : 48060  
##  Mean   : 73511   Mean   : 56500   Mean   : 56435   Mean   : 56433  
##  3rd Qu.: 95487   3rd Qu.: 74825   3rd Qu.: 74408   3rd Qu.: 74436  
##  Max.   :263101   Max.   :219382   Max.   :218997   Max.   :219291  
##      merged          nonchim      
##  Min.   :     0   Min.   :     0  
##  1st Qu.: 29172   1st Qu.: 29172  
##  Median : 47653   Median : 47653  
##  Mean   : 56030   Mean   : 55349  
##  3rd Qu.: 73485   3rd Qu.: 73400  
##  Max.   :218521   Max.   :217372

Let’s now retrieve stats but only on well supported taxa

# Function to extract taxonomic levels and count unique entries with bootstrap filtering
# Function to extract taxonomic levels and count unique entries
analyze_taxonomy <- function(data) {
  # Get taxonomy data
  tax_data <- data$dada2_tax
  
  # Split taxonomy string into components
  tax_splits <- strsplit(tax_data, ";")
  
  # Function to safely extract taxonomic level with proper rank checking
  extract_tax_level <- function(tax_array, rank) {
    # Find the position containing the rank
    rank_pos <- grep(rank, tax_array)
    if(length(rank_pos) > 0) {
      # Extract and clean the taxonomy name
      tax <- tax_array[rank_pos]
      # Remove the rank pattern and any leading/trailing whitespace
      cleaned_tax <- trimws(gsub(paste0("--\\d+--", rank), "", tax))
      return(cleaned_tax)
    }
    return(NA)
  }
  
  # Extract each taxonomic level with proper hierarchy checking
  families <- sapply(tax_splits, function(x) extract_tax_level(x, "Family"))
  genera <- sapply(tax_splits, function(x) extract_tax_level(x, "Genus"))
  species <- sapply(tax_splits, function(x) extract_tax_level(x, "Species"))
  
  # Create a data frame to check hierarchy consistency
  tax_df <- data.frame(
    Family = families,
    Genus = genera,
    Species = species,
    stringsAsFactors = FALSE
  )
  
  # Remove NA values before counting
  families <- families[!is.na(families)]
  genera <- genera[!is.na(genera)]
  species <- species[!is.na(species)]
  
  # Count unique entries
  unique_counts <- list(
    families = length(unique(families)),
    genera = length(unique(genera)),
    species = length(unique(species))
  )
  
  # Get prevalence with hierarchy checking
  family_prev <- table(families)
  genus_prev <- table(genera)
  species_prev <- table(species)
  
  # Sort prevalence tables
  family_prev <- sort(family_prev, decreasing = TRUE)
  genus_prev <- sort(genus_prev, decreasing = TRUE)
  species_prev <- sort(species_prev, decreasing = TRUE)
  
  # Print top 5 of each level for verification
  cat("\nTop 5 most prevalent Families:\n")
  print(head(family_prev, 5))
  cat("\nTop 5 most prevalent Genera:\n")
  print(head(genus_prev, 5))
  cat("\nTop 5 most prevalent Species:\n")
  print(head(species_prev, 5))
  
  # Get most prevalent taxa with hierarchy verification
  most_prevalent <- list(
    family = if(length(family_prev) > 0) names(family_prev)[1] else "None found",
    genus = if(length(genus_prev) > 0) names(genus_prev)[1] else "None found",
    species = if(length(species_prev) > 0) names(species_prev)[1] else "None found"
  )
  
  return(list(
    unique_counts = unique_counts,
    most_prevalent = most_prevalent,
    tax_df = tax_df  # Return full taxonomy dataframe for inspection
  ))
}

# Run analysis for both datasets
if(exists("asv_matrix_rps10")) {
  rps10_results <- analyze_taxonomy(asv_matrix_rps10)
}
## 
## Top 5 most prevalent Families:
## families
##      Pythiaceae Peronosporaceae Saprolegniaceae   Lagenidiaceae Salisapiliaceae 
##             172             125              20              10               9 
## 
## Top 5 most prevalent Genera:
## genera
##      Pythium Phytophthora  Aphanomyces  Peronospora  Saprolegnia 
##          153           96           16           16           11 
## 
## Top 5 most prevalent Species:
## species
## Phytophthora_sp.kununurra        Pythium_dissotocum Phytophthora_citrophthora 
##                        24                        20                        19 
##        Phytophthora_sojae        Pythium_irregulare 
##                        13                        13
if(exists("asv_matrix_its")) {
  its_results <- analyze_taxonomy(asv_matrix_its)
}
## 
## Top 5 most prevalent Families:
## families
##         Fungi_fam_Incertae_sedis                  Serendipitaceae 
##                              366                              200 
## Rozellomycota_fam_Incertae_sedis                  Hyaloscyphaceae 
##                              176                              157 
##                       Tuberaceae 
##                              128 
## 
## Top 5 most prevalent Genera:
## genera
##         Fungi_gen_Incertae_sedis                      Serendipita 
##                              366                              196 
## Rozellomycota_gen_Incertae_sedis                            Tuber 
##                              176                              128 
##                      Hyaloscypha 
##                               94 
## 
## Top 5 most prevalent Species:
## species
##             Fungi_sp       Serendipita_sp     Rozellomycota_sp 
##                  366                  190                  176 
##  Tuber_pseudobrumale Archaeorhizomyces_sp 
##                  128                   48

Let’s now use combined CSV files to make a few more plots-this shows how we can easily convert metadata and combined ASV matrix into a another taxmap object

First we will configure the combined taxmap object

#load reference database
rps10_database <- read_fasta("demulticoder/data/rps10_reference_db.fa")
innoc_species <- c(Cin = "Phytophthora_cinnamomi",  Plu = "Phytophthora_plurivora", Cry = "Pythium_cryptoirregulare")
innoc_regex <- paste0('(', paste0(innoc_species, collapse = '|'), ')')
innoc_seqs <- rps10_database[grepl(names(rps10_database), pattern = innoc_regex)]
innoc_seqs <- innoc_seqs[! duplicated(innoc_seqs)]
names(innoc_seqs) <- str_match(names(innoc_seqs), pattern = innoc_regex)[, 2]

iupac_match <- function(asv_chars, ref_chars) {
  map2_lgl(asv_chars, ref_chars, function(asv, ref) grepl(asv, pattern = paste0('[', IUPAC_CODE_MAP[ref], ']+')))
}

# Count number of mismatches in an alignment, allowing for IUPAC codes in reference
align_mismatch <- function(alignment) {
  asv_chars <- strsplit(as.character(alignment@pattern), '')[[1]]
  ref_chars <- strsplit(as.character(alignment@subject), '')[[1]]
  sum(! iupac_match(asv_chars, ref_chars))
}

# Align each sequence to each asv and make table of results"
aligned_data_path <- file.path("demulticoder/data", "infection_aligned_data.rds")

if (file.exists(aligned_data_path)) {
  aligned_data <- readRDS(aligned_data_path)
} else {
  aligned_data <-  future_map_dfr(seq_along(innoc_seqs), function(i) {
    aligned <- lapply(abundance$sequence, function(asv) pairwiseAlignment(pattern = asv, subject = innoc_seqs[i], type = 'global-local'))
    print(paste("Processing species:", names(innoc_seqs)[i]))
    tibble(
      species = names(innoc_seqs)[i],
      align_len = map_dbl(aligned, nchar),
      mismatch = map_dbl(aligned, align_mismatch),
      pid = (align_len - mismatch) / align_len,
      asv_seq = abundance$sequence, 
      ref_seq = innoc_seqs[i],
      alignment = aligned
    )
  })
  saveRDS(aligned_data, file = aligned_data_path)
}

# Convert ASV abundances to proportions for use with matches
abundance_prop <- abundance
abundance_prop[metadata$sample_name] <- lapply(abundance_prop[metadata$sample_name], function(counts) counts/sum(counts))

innoculum_asv_mismatch_threshold <-1
# Sum the read counts of ASVs representing the same species used for inoculation
infection_data <- aligned_data %>%
  filter(mismatch <= innoculum_asv_mismatch_threshold) %>%
  left_join(abundance_prop, by = c(asv_seq = "sequence")) %>%
  group_by(asv_seq) %>% slice_min(mismatch, with_ties = FALSE) # Only consider the best match per ASV

Prepare trimmed tables for subsequent analysis

## Update abundance matrix and metadata with results
inoculum_asv_key <- setNames(infection_data$species, infection_data$asv_seq)
abundance <- mutate(abundance, is_inoculum = sequence %in% infection_data$asv_seq, inoculum = inoculum_asv_key[sequence], .after = "dada2_tax")
write_csv(abundance, file.path('demulticoder/data', 'abundance_with_infection_data.csv'))

#Let's append metadata table with more info
species_name_key <- c(Phytophthora_cinnamomi = "cin_prop", Phytophthora_plurivora = "plu_prop", Pythium_cryptoirregulare = "cry_prop")
sample_infection_data <- infection_data %>%
  group_by(species) %>% summarise_at(metadata$sample_name, sum) %>% # Sum per inoculum species
  gather(key = "sample_name", value = "read_prop", !!! metadata$sample_name) %>%
  mutate(species = species_name_key[species]) %>%
  mutate(read_prop = ifelse(is.nan(read_prop), 0, read_prop)) %>%
  tidyr::spread(key = 'species', value = 'read_prop')
metadata <- metadata %>%
  select(sample_name, plate, well, organism, flooded, path_conc, experiment, sample_type, is_ambiguous) %>% # Avoid duplicating columns from previous runs
  left_join(sample_infection_data, by = "sample_name")

# Get expected inoculum proportion and non-target proportion
metadata$expected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(NA)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]])
  }
})
metadata$unexpected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  prop_cols <- c("cin_prop", "cry_prop", "plu_prop")
  if (metadata$sample_type[i] == "Mock community"  || metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(sum(metadata[i, prop_cols]))
  } else {
    expected_prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    prop_cols <- prop_cols[! prop_cols %in% expected_prop_col]
    return(sum(metadata[i, prop_cols]))
  }
})

# Check if expected inoculum was found
metadata$expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0)
  }
})

# Check if ONLY expected inoculum was found
metadata$only_expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0 && metadata[[i, prop_col]] == all_innoc)
  }
})

# Look at number of samples with expected inoculum
table(metadata$expected_innoc)
## 
## FALSE  TRUE 
##    88    82
table(metadata$only_expected_innoc)
## 
## FALSE  TRUE 
##   107    63
metadata$valid_inoc <- (is.na(metadata$expected_innoc_prop) | metadata$expected_innoc_prop > 0.0001) & metadata$unexpected_innoc_prop < 0.0001
map_dbl(split(metadata$valid_inoc, metadata$organism), sum)
##     Cin Control     Cry     Plu 
##      26       9      16       8
# Save new metadata
write_csv(metadata, file.path('demulticoder/data', 'metadata_with_infection_data.csv'))

For the sample data, I will add columns for the proportion of reads in each sample representing each inoculum as well as a columns that indicate whether the expected inoculum was found, and if so, if it was the only inoculum found.

# Get per-sample proportions of reads
species_name_key <- c(Phytophthora_cinnamomi = "cin_prop", Phytophthora_plurivora = "plu_prop", Pythium_cryptoirregulare = "cry_prop")
sample_infection_data <- infection_data %>%
  group_by(species) %>% summarise_at(metadata$sample_name, sum) %>% # Sum per inoculum species
  tidyr::gather(key = "sample_name", value = "read_prop", !!! metadata$sample_name) %>%
  mutate(species = species_name_key[species]) %>%
  mutate(read_prop = ifelse(is.nan(read_prop), 0, read_prop)) %>%
  tidyr::spread(key = 'species', value = 'read_prop')
metadata <- metadata %>%
  select(sample_name, plate, well, organism, flooded, path_conc, experiment, sample_type, is_ambiguous) %>% # Avoid duplicating columns from previous runs
  left_join(sample_infection_data, by = "sample_name")

# Get expected inoculum proportion and non-target proportion
metadata$expected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(NA)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]])
  }
})
metadata$unexpected_innoc_prop <- map_dbl(1:nrow(metadata), function(i) {
  prop_cols <- c("cin_prop", "cry_prop", "plu_prop")
  if (metadata$sample_type[i] == "Mock community"  || metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(sum(metadata[i, prop_cols]))
  } else {
    expected_prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    prop_cols <- prop_cols[! prop_cols %in% expected_prop_col]
    return(sum(metadata[i, prop_cols]))
  }
})

# Check if expected inoculum was found
metadata$expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0)
  }
})

# Check if ONLY expected inoculum was found
metadata$only_expected_innoc <- map_lgl(1:nrow(metadata), function(i) {
  all_innoc <- sum(metadata[i, c('cin_prop', 'cry_prop', 'plu_prop')])
  if (metadata$sample_type[i] == "Mock community") {
    return(NA)
  } else if (metadata$sample_type[i] == "Negative control" || metadata$organism[i] == "Control") {
    return(all_innoc == 0)
  } else {
    prop_col <- paste0(tolower(metadata$organism[i]), '_prop')
    return(metadata[[i, prop_col]] > 0 && metadata[[i, prop_col]] == all_innoc)
  }
})

# Look at number of samples with expected inoculum
table(metadata$expected_innoc)
## 
## FALSE  TRUE 
##    88    82
table(metadata$only_expected_innoc)
## 
## FALSE  TRUE 
##   107    63
metadata$valid_inoc <- (is.na(metadata$expected_innoc_prop) | metadata$expected_innoc_prop > 0.0001) & metadata$unexpected_innoc_prop < 0.0001
map_dbl(split(metadata$valid_inoc, metadata$organism), sum)
##     Cin Control     Cry     Plu 
##      26       9      16       8
# Save new metadata
write_csv(metadata, file.path('demulticoder/data', 'metadata_with_infection_data.csv'))

Let’s convert our new matrix and associated metadata table to a taxmap object

metadata <- filter(metadata, ! is_ambiguous)
abundance <- filter(abundance, ! is_inoculum)

#Filter out mock community samples and also any mock community samples as well
metadata <- filter(metadata, sample_type != "Mock community" & sample_type != "Negative control")

obj <- parse_tax_data(abundance, class_cols = 'dada2_tax', class_sep = ';',
                      class_regex = '^(.+)--(.+)--(.+)$',
                      class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj$data) <- c('abund', 'score')
obj <- transmute_obs(obj, 'score', sequence = sequence[input_index], boot = boot, rank = rank)


# For diversity calculations, we'll use proportions of read depth
# We'll set any proportion to 0 that is less than the inverse of the read count of the non-control sample with the fewest reads.
# This should account for unequal sample read depth without the randomness of rarefaction.

metadata$raw_count <- colSums(obj$data$abund[, metadata$sample_name])
lowest_count <- min(metadata$raw_count[! is.na(metadata$organism)])
lowest_count
## [1] 12676
obj$data$prop <- calc_obs_props(obj, data = 'abund', cols = metadata$sample_name)
## Calculating proportions from counts for 162 columns for 2707 observations.
obj$data$prop <- zero_low_counts(obj, data = 'prop', min_count = 1 / lowest_count, cols = metadata$sample_name)
## Zeroing 2166 of 438534 counts less than 7.88892395077311e-05.
obj$data$prop
## # A tibble: 2,707 × 163
##    taxon_id       S1      S10      S11      S12     S13      S14     S15     S16
##    <chr>       <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>   <dbl>   <dbl>
##  1 bga      0.0821   0.101    0.0414   0.0552   0       0.0366   0.168   0      
##  2 bgb      0.00867  0.0190   0.0643   0.00314  0.0220  0.00554  0.0162  2.47e-2
##  3 bgc      0.134    0        0        0        0       0.000293 0       1.67e-1
##  4 bgd      0.00482  0        0.0314   0.00815  0.00514 0        0       0      
##  5 bge      0.00584  0.00589  0.00528  0.00252  0.0379  0.00453  0.00942 2.03e-2
##  6 bgf      0.000106 0.0137   0.00153  0.00339  0.0173  0.00664  0.00864 6.26e-2
##  7 bgg      0.000158 0.000228 0.000208 0.000121 0.00139 0        0       9.56e-4
##  8 bgh      0.00322  0.0271   0.00813  0.000482 0.00190 0.000601 0.0293  2.66e-3
##  9 bgc      0        0        0.188    0        0       0.0288   0       3.37e-3
## 10 bgc      0        0        0.183    0.0323   0.242   0        0.0507  2.32e-1
## # ℹ 2,697 more rows
## # ℹ 154 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>, S20 <dbl>, S21 <dbl>,
## #   S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>, S26 <dbl>, S27 <dbl>,
## #   S28 <dbl>, S29 <dbl>, S3 <dbl>, S31 <dbl>, S32 <dbl>, S33 <dbl>, S34 <dbl>,
## #   S35 <dbl>, S36 <dbl>, S37 <dbl>, S38 <dbl>, S39 <dbl>, S4 <dbl>, S40 <dbl>,
## #   S41 <dbl>, S42 <dbl>, S43 <dbl>, S45 <dbl>, S46 <dbl>, S47 <dbl>,
## #   S48 <dbl>, S49 <dbl>, S5 <dbl>, S50 <dbl>, S51 <dbl>, S52 <dbl>, …
obj$data$prop[metadata$sample_name] <- map(metadata$sample_name, function(id) {
  out <- obj$data$prop[[id]]
  out[is.na(out) | is.nan(out)] <- 0
  out
})

obj$data$prop
## # A tibble: 2,707 × 163
##    taxon_id       S1      S10      S11      S12     S13      S14     S15     S16
##    <chr>       <dbl>    <dbl>    <dbl>    <dbl>   <dbl>    <dbl>   <dbl>   <dbl>
##  1 bga      0.0821   0.101    0.0414   0.0552   0       0.0366   0.168   0      
##  2 bgb      0.00867  0.0190   0.0643   0.00314  0.0220  0.00554  0.0162  2.47e-2
##  3 bgc      0.134    0        0        0        0       0.000293 0       1.67e-1
##  4 bgd      0.00482  0        0.0314   0.00815  0.00514 0        0       0      
##  5 bge      0.00584  0.00589  0.00528  0.00252  0.0379  0.00453  0.00942 2.03e-2
##  6 bgf      0.000106 0.0137   0.00153  0.00339  0.0173  0.00664  0.00864 6.26e-2
##  7 bgg      0.000158 0.000228 0.000208 0.000121 0.00139 0        0       9.56e-4
##  8 bgh      0.00322  0.0271   0.00813  0.000482 0.00190 0.000601 0.0293  2.66e-3
##  9 bgc      0        0        0.188    0        0       0.0288   0       3.37e-3
## 10 bgc      0        0        0.183    0.0323   0.242   0        0.0507  2.32e-1
## # ℹ 2,697 more rows
## # ℹ 154 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>, S20 <dbl>, S21 <dbl>,
## #   S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>, S26 <dbl>, S27 <dbl>,
## #   S28 <dbl>, S29 <dbl>, S3 <dbl>, S31 <dbl>, S32 <dbl>, S33 <dbl>, S34 <dbl>,
## #   S35 <dbl>, S36 <dbl>, S37 <dbl>, S38 <dbl>, S39 <dbl>, S4 <dbl>, S40 <dbl>,
## #   S41 <dbl>, S42 <dbl>, S43 <dbl>, S45 <dbl>, S46 <dbl>, S47 <dbl>,
## #   S48 <dbl>, S49 <dbl>, S5 <dbl>, S50 <dbl>, S51 <dbl>, S52 <dbl>, …
#Let's modify the metadata sheet
metadata <- metadata %>%
  mutate(organism = fct_relevel(ordered(organism), "Control", "Cin", "Plu", "Cry"))

Alpha diversity

abund_table <- obj$data$abund[metadata$sample_name]
metadata$richness <- vegan::specnumber(abund_table, MARGIN = 2)
metadata$shannon <- vegan::diversity(abund_table, MARGIN = 2, index = "shannon")
metadata$invsimpson <- vegan::diversity(abund_table, MARGIN = 2, index = "invsimpson")
write.csv(metadata, file.path("demulticoder", "results/alpha_diversity.csv"))
print(metadata)
## # A tibble: 162 × 21
##    sample_name plate well  organism flooded path_conc experiment sample_type
##    <chr>       <dbl> <chr> <ord>    <lgl>       <dbl>      <dbl> <chr>      
##  1 S1              1 A01   Cry      TRUE          100          1 Sample     
##  2 S10             1 B02   Cry      TRUE            1          1 Sample     
##  3 S11             1 C02   Plu      FALSE           1          1 Sample     
##  4 S12             1 D02   Plu      TRUE            1          1 Sample     
##  5 S13             1 E02   Control  TRUE            0          1 Sample     
##  6 S14             1 F02   Cin      TRUE            1          1 Sample     
##  7 S15             1 H02   Cin      FALSE         100          1 Sample     
##  8 S16             1 A03   Cry      FALSE         100          1 Sample     
##  9 S17             1 B03   Cry      TRUE          100          1 Sample     
## 10 S18             1 C03   Control  TRUE            0          1 Sample     
## # ℹ 152 more rows
## # ℹ 13 more variables: is_ambiguous <lgl>, cin_prop <dbl>, cry_prop <dbl>,
## #   plu_prop <dbl>, expected_innoc_prop <dbl>, unexpected_innoc_prop <dbl>,
## #   expected_innoc <lgl>, only_expected_innoc <lgl>, valid_inoc <lgl>,
## #   raw_count <dbl>, richness <int>, shannon <dbl>, invsimpson <dbl>
plotted_factors <- c('Organism' = 'organism', 'Flooded' = 'flooded', 'Pathogen Concentration' = 'path_conc', 'Trial' = 'experiment')

# Reformat data for plotting
alpha_plot_data <- plotted_factors %>%
  map2_dfr(names(plotted_factors), 
           function(factor, factor_name) {
             out <- metadata
             out$factor <- factor_name
             out$value <- as.character(metadata[[factor]])
             return(out)
           }) %>%
  mutate(path_conc = factor(path_conc, 
                            levels = sort(unique(path_conc)), 
                            labels = paste(sort(unique(path_conc)), 'CFU/g'), 
                            ordered = TRUE)) %>%
  filter(sample_type == 'Sample') %>%
  select(sample_name, factor, value, invsimpson) %>%
  tidyr::gather(key = "index", value = "diversity", -sample_name, -factor, -value) %>%
  mutate(value = forcats::fct_relevel(ordered(value), "Control", "Cin", "Plu", "Cry"))

# ANOVA and Tukey's HSD
anova_and_hsd <- function(x) {
  anova_result <- aov(diversity ~ value, x)
  tukey_result <- agricolae::HSD.test(anova_result, "value", group = TRUE)
  group_data <- tukey_result$groups[order(rownames(tukey_result$groups)),]
  group_key <- setNames(group_data$groups, rownames(group_data))
  group_key[as.character(x$value)]
}
alpha_plot_data$group <- unlist(map(split(alpha_plot_data, alpha_plot_data$factor)[unique(alpha_plot_data$factor)], anova_and_hsd))


alpha_subplot <- ggplot(alpha_plot_data, aes(x = value, y = diversity)) +
  geom_boxplot() +
  geom_text(aes(x = value,
                y = max(diversity) + 2,
                label = group),
            col = 'black',
            size = 5) +
  facet_grid( ~ factor, scales = "free") +
  labs(x = NULL, y = 'Diversity (Inverse Simpson)') +
  guides(color = "none") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position="bottom")

Beta diversity

library(cowplot)
set.seed(1)
prob_table <- obj$data$prop[metadata$sample_name]
nmds_plot_data <- function(prob_table) {
  metadata <- metadata[metadata$sample_name %in% colnames(prob_table), ]
  set.seed(1)
  nmds_results <- vegan::metaMDS(t(prob_table), trymax = 1000, k = 2, trace = 0)
  nmds_data <- nmds_results$points %>%
    as_tibble() %>%
    bind_cols(metadata)
  names(nmds_data)[1:2] <- paste0("NMDS", 1:2)
  return(nmds_data)
}

nmds_data <- nmds_plot_data(prob_table[! is.na(metadata$organism) & metadata$valid_inoc])

nmds_factors <- c(Flooded = 'flooded', Organism = 'organism', 'Pathogen CFU/g' = 'path_conc', 'Trial' = 'experiment')

make_one_plot <- function(factor, name) {
  nmds_data %>%
    mutate(factor = as.character(nmds_data[[factor]]),
           NMDS1 = scales::rescale(NMDS1),
           NMDS2 = scales::rescale(NMDS2)) %>%
    mutate(factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu", "Cry")) %>%
    ggplot(aes_string(x = "NMDS1", y = "NMDS2", color = "factor", label = "sample_name")) +
    geom_point() +
    coord_fixed() +
    viridis::scale_color_viridis(discrete = TRUE, end = .9) +
    labs(color = NULL, x = "NMDS1", y = "NMDS2") +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.text = element_text(size = 10),
          axis.title = element_text(size = 12),
          axis.ticks = element_line(color = "black"),
          plot.margin = unit(rep(0.04, 4), "cm"),
          legend.position = "bottom",
          legend.text = element_text(size = 10),
          legend.key.height = unit(0.5, 'cm'),
          legend.key.width = unit(0.2, 'cm'))
  
}

nmds_subplots <- map2(nmds_factors, names(nmds_factors), make_one_plot)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
##   "Cry")`.
## Caused by warning:
## ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
##   "Cry")`.
## Caused by warning:
## ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
## There was 1 warning in `mutate()`.
## ℹ In argument: `factor = fct_relevel(ordered(factor), "Control", "Cin", "Plu",
##   "Cry")`.
## Caused by warning:
## ! 4 unknown levels in `f`: Control, Cin, Plu, and Cry
nmds_plot <- ggpubr::ggarrange(plotlist = c(list(ggplot() + theme_void()), nmds_subplots),
                       nrow = 1, widths = c(0.15, 1, 1, 1, 1))

ggsave(nmds_plot, path = "demulticoder/figures", filename = "nmds.pdf", 
       width = 7, height = 8)
print(nmds_plot)

combined_div_plot <- plot_grid(alpha_subplot, nmds_plot, ncol = 1, labels = c('A', 'B'),
                               rel_heights = c(1, 0.9))
combined_div_plot

ggsave(combined_div_plot, path = "demulticoder/figures", filename = "diversity.pdf", 
       width = 13, height = 8, bg = "#FFFFFF")

ggsave(combined_div_plot, path = "demulticoder/figures", filename = "diversity.svg", 
       width = 13, height = 8, bg = "#FFFFFF")

### Export coordinates
write.csv(nmds_data, file.path("demulticoder", "results/compiled_stats.csv"))

Let’s do associated PERMANOVA analyses to better understand significance of differences in diversity between the 3 factors

dist_matrix <- vegdist(t(prob_table), method = "bray")

adonis2_full <- adonis2(dist_matrix ~ organism + flooded + path_conc + experiment, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_full)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ organism + flooded + path_conc + experiment, data = metadata, permutations = 999, method = "bray")
##             Df SumOfSqs      R2      F Pr(>F)    
## organism     3    1.254 0.02384 1.3900  0.048 *  
## flooded      1    1.848 0.03514 6.1469  0.001 ***
## path_conc    1    0.276 0.00524 0.9165  0.545    
## experiment   1    2.614 0.04971 8.6953  0.001 ***
## Residual   155   46.598 0.88608                  
## Total      161   52.590 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2_org <- adonis2(dist_matrix ~ organism, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_org)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ organism, data = metadata, permutations = 999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)
## organism   3    1.254 0.02384 1.2861  0.101
## Residual 158   51.336 0.97616              
## Total    161   52.590 1.00000
adonis2_flooded <- adonis2(dist_matrix ~ flooded, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_flooded)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ flooded, data = metadata, permutations = 999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## flooded    1    1.849 0.03515 5.8294  0.001 ***
## Residual 160   50.741 0.96485                  
## Total    161   52.590 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2_path_conc <- adonis2(dist_matrix ~ path_conc, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_path_conc)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ path_conc, data = metadata, permutations = 999, method = "bray")
##            Df SumOfSqs      R2      F Pr(>F)
## path_conc   1    0.308 0.00586 0.9431  0.466
## Residual  160   52.281 0.99414              
## Total     161   52.590 1.00000
adonis2_experiment <- adonis2(dist_matrix ~ experiment, 
                         data = metadata, 
                         permutations = 999,
                         method = "bray")

print(adonis2_experiment)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ experiment, data = metadata, permutations = 999, method = "bray")
##             Df SumOfSqs      R2      F Pr(>F)    
## experiment   1    2.613 0.04969 8.3655  0.001 ***
## Residual   160   49.977 0.95031                  
## Total      161   52.590 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Let’s make new taxmap object in preparation to make heattree examining abundance and diversity of taxa in at least 5 samples

#Make sure our factors are input properly as factors
metadata$path_conc <- as.factor(metadata$path_conc)
metadata$path_conc<- factor(metadata$path_conc, levels = c("0", "1", "100"))

metadata$organism <- as.factor(metadata$organism)
metadata$organism<- factor(metadata$organism, levels = c("Control", "Cin", "Cry", "Plu"))

metadata$flooded <- as.factor(metadata$flooded)
metadata$flooded <- factor(metadata$flooded, levels = c("TRUE", "FALSE"))

metadata$experiment <- as.factor(metadata$experiment)
metadata$experiment<- factor(metadata$experiment, levels = c("1", "2"))

abundance$is_low_abund <- rowSums(abundance[, metadata$sample_name]) < 5
obj2 <- parse_tax_data(abundance, class_cols = 'dada2_tax', class_sep = ';',
                      class_regex = '^(.+)--(.+)--(.+)$',
                      class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj2$data) <- c('abund', 'score')
obj2 <- transmute_obs(obj2, 'score', sequence = sequence[input_index], boot = boot, rank = rank)
print(obj2)
## <Taxmap>
##   1575 taxa: aab. Eukaryota, aac. Fungi ... cip. Fungi_sp
##   1575 edges: NA->aab, aab->aac, aab->aad ... cin->cio, cio->cip
##   2 data sets:
##     abund:
##       # A tibble: 2,707 × 180
##         taxon_id sequence   dada2_tax is_inoculum inoculum    S1   S10
##         <chr>    <chr>      <chr>     <lgl>       <chr>    <int> <int>
##       1 bga      AAGTCGTAA… Eukaryot… FALSE       <NA>      4670  3565
##       2 bgb      AAGTCGTAA… Eukaryot… FALSE       <NA>       493   668
##       3 bgc      AAAAAGTCG… Eukaryot… FALSE       <NA>      7603     0
##       # ℹ 2,704 more rows
##       # ℹ 173 more variables: S100 <int>, S101 <int>, S102 <int>,
##       #   S103 <int>, S104 <int>, S105 <int>, S106 <int>, S107 <int>,
##       #   S108 <int>, S109 <int>, …
##     score:
##       # A tibble: 23,852 × 4
##         taxon_id sequence                                  boot  rank 
##         <chr>    <chr>                                     <chr> <chr>
##       1 aab      AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGA… 100   Doma…
##       2 aac      AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGA… 100   King…
##       3 aae      AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGA… 73    Phyl…
##       # ℹ 23,849 more rows
##   0 functions:
obj2$data$tax_abund <- calc_taxon_abund(obj2, data = "abund", cols = metadata$sample_name)
## Summing per-taxon counts from 162 columns for 1575 taxa
obj2$data$abund_prop <- calc_obs_props(obj2, data = "abund", cols = metadata$sample_name, groups = rep("tax_prop", nrow(metadata)))
## Calculating proportions from counts for 162 columns in 1 groups for 2707 observations.
obj2$data$tax_prop <- calc_taxon_abund(obj2, data = "abund_prop", cols = "tax_prop")
## Summing per-taxon counts from 1 columns for 1575 taxa
obj2$data$abund_prop <- NULL

obj_subset <- obj2 %>%
  filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE)) 
min_bootstrap <- 0

obj_subset$data$score$boot <- as.numeric(obj_subset$data$score$boot)
max_boot <- obj_subset$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_subset <- filter_taxa(obj_subset, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by inoculum concentration

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by whether plots were flooded or not

## [1] 7.253048e-18 9.976959e-01
## [1] -24.788261   5.134441

Let’s make differential abundance plots for our combined dataset-first let’s look at diff. abundance of taxa by inoculum source

Let’s try to facet these plots.

Let’s finally make an exploratory heat tree to look at community composition across all samples, just retaining ASVs present at least 10 times

Overall community plot

obj_subset <- obj2 %>%
  filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE)) 
min_bootstrap <- 60

obj_subset$data$score$boot <- as.numeric(obj_subset$data$score$boot)
max_boot <- obj_subset$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_subset <- filter_taxa(obj_subset, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))

obj_subset$data$asv_prop <- calc_obs_props(obj_subset, 'abund', cols = metadata$sample_name)
## Calculating proportions from counts for 162 columns for 2707 observations.
obj_subset$data$tax_abund <- calc_taxon_abund(obj_subset, 'abund', cols = metadata$sample_name)
## Summing per-taxon counts from 162 columns for 1125 taxa
obj_subset$data$tax_prop <- calc_taxon_abund(obj_subset, 'asv_prop', cols = metadata$sample_name)
## Summing per-taxon counts from 162 columns for 1125 taxa
obj_subset$data$tax_data <- calc_n_samples(obj_subset, 'tax_prop', cols = metadata$sample_name[metadata$sample_type == 'Sample'])
## Calculating number of samples with a value greater than 0 for 162 columns for 1125 observations
obj_subset$data$tax_data$mean_prop <- rowMeans(obj_subset$data$tax_prop[, metadata$sample_name])

# Calculate the relative standard deviation for each taxon as a measure of how consistently it was found.
rsd <- function(x, na.rm = FALSE) {sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)}
obj_subset$data$tax_data$rel_stand_dev <- map_dbl(1:nrow(obj_subset$data$tax_abund), function(i) {
  rsd(unlist(obj_subset$data$tax_abund[i, metadata$sample_name]), na.rm = TRUE)
})
obj_subset
## <Taxmap>
##   1125 taxa: aab. Eukaryota, aac. Fungi ... cip. Fungi_sp
##   1125 edges: NA->aab, aab->aac, aab->aad ... cin->cio, cio->cip
##   6 data sets:
##     abund:
##       # A tibble: 2,707 × 180
##         taxon_id sequence   dada2_tax is_inoculum inoculum    S1   S10
##         <chr>    <chr>      <chr>     <lgl>       <chr>    <int> <int>
##       1 acm      AAGTCGTAA… Eukaryot… FALSE       <NA>      4670  3565
##       2 bgb      AAGTCGTAA… Eukaryot… FALSE       <NA>       493   668
##       3 bgc      AAAAAGTCG… Eukaryot… FALSE       <NA>      7603     0
##       # ℹ 2,704 more rows
##       # ℹ 173 more variables: S100 <int>, S101 <int>, S102 <int>,
##       #   S103 <int>, S104 <int>, S105 <int>, S106 <int>, S107 <int>,
##       #   S108 <int>, S109 <int>, …
##     score:
##       # A tibble: 22,551 × 4
##         taxon_id sequence                                   boot rank 
##         <chr>    <chr>                                     <dbl> <chr>
##       1 aab      AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGA…   100 Doma…
##       2 aac      AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGA…   100 King…
##       3 aae      AAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGA…    73 Phyl…
##       # ℹ 22,548 more rows
##     tax_abund:
##       # A tibble: 1,125 × 163
##         taxon_id    S1   S10   S11   S12   S13   S14   S15   S16   S17
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 aab      56860 35128 28797 58035 46889 68269 16554 24056 49958
##       2 aac      27729 12225 22562 10819 27587 10124  7579 22610 23009
##       3 aad      29131 22903  6235 47216 19302 58145  8975  1446 26949
##       # ℹ 1,122 more rows
##       # ℹ 153 more variables: S18 <dbl>, S2 <dbl>, S20 <dbl>,
##       #   S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>,
##       #   S26 <dbl>, S27 <dbl>, …
##     tax_prop:
##       # A tibble: 1,125 × 163
##         taxon_id    S1   S10   S11   S12   S13   S14   S15    S16
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##       1 aab      1     1     1     1     1.00  1     1     1     
##       2 aac      0.488 0.348 0.783 0.186 0.588 0.148 0.458 0.940 
##       3 aad      0.512 0.652 0.217 0.814 0.412 0.852 0.542 0.0601
##       # ℹ 1,122 more rows
##       # ℹ 154 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>,
##       #   S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>,
##       #   S25 <dbl>, S26 <dbl>, …
##     asv_prop:
##       # A tibble: 2,707 × 163
##         taxon_id      S1    S10    S11     S12    S13      S14    S15
##         <chr>      <dbl>  <dbl>  <dbl>   <dbl>  <dbl>    <dbl>  <dbl>
##       1 acm      0.0821  0.101  0.0414 0.0552  0      0.0366   0.168 
##       2 bgb      0.00867 0.0190 0.0643 0.00314 0.0220 0.00554  0.0162
##       3 bgc      0.134   0      0      0       0      0.000293 0     
##       # ℹ 2,704 more rows
##       # ℹ 155 more variables: S16 <dbl>, S17 <dbl>, S18 <dbl>,
##       #   S2 <dbl>, S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>,
##       #   S24 <dbl>, S25 <dbl>, …
##     tax_data:
##       # A tibble: 1,125 × 4
##         taxon_id n_samples mean_prop rel_stand_dev
##         <chr>        <int>     <dbl>         <dbl>
##       1 aab            162     1             0.443
##       2 aac            162     0.416         0.652
##       3 aad            162     0.584         0.711
##       # ℹ 1,122 more rows
##   0 functions:
set.seed(1000)
obj_subset %>%
  filter_taxa(! is_stem) %>%
  filter_taxa(n_samples >= 10, supertaxa = TRUE, reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "_sp$"), reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "incertae_sedis", ignore.case = TRUE), reassign_obs = FALSE) %>%
  remove_redundant_names() %>%
  heat_tree(node_size = mean_prop,
            edge_size = n_samples,
            node_color = ifelse(is.na(rel_stand_dev), 0, rel_stand_dev),
            #node_color_range = c("#aaaaaa", "#8da0cb", "#66c2a5", "#a6d854", "#fc8d62", "red"),
            node_label = taxon_names,
            node_size_range = c(0.008, 0.025),
            node_label_size_range = c(0.012, 0.018),
            edge_label_size_range = c(0.010, 0.013),
            node_size_interval = c(0, 1),
            edge_size_range = c(0.001, 0.008),
            #layout = "da", initial_layout = "re",
            node_color_axis_label = "Relative standard deviation",
            node_size_axis_label = "Mean proportion of reads",
            edge_size_axis_label = "Number of samples",
            node_color_digits = 2,
            node_size_digits = 2,
            edge_color_digits = 2,
            edge_size_digits = 2,
            #aspect_ratio = 1.618,
            output_file = file.path('demulticoder/figures', '/heattree_mostabund_taxa.pdf'))

Reload ASV matrices and metadata for final checks when examining new db to the old version before updates

asv_matrix_rps10<-read.csv("demulticoder/data/final_asv_abundance_matrix_rps10.csv")
asv_matrix_rps10$dada2_tax <- asv_matrix_rps10$dada2_tax <- gsub("Stramenopila", "Eukaryota--100--Domain;Stramenopila", asv_matrix_rps10$dada2_tax)
asv_matrix_rps10 <- asv_matrix_rps10[, -1]
colnames(asv_matrix_rps10)[3:ncol(asv_matrix_rps10)] <- gsub("_rps10$", "", colnames(asv_matrix_rps10)[3:ncol(asv_matrix_rps10)])

asv_matrix_rps10_oldb<-read.csv("demulticoder/data_olddb//final_asv_abundance_matrix_rps10.csv")
asv_matrix_rps10_oldb$dada2_tax <- asv_matrix_rps10_oldb$dada2_tax <- gsub("Stramenopila", "Eukaryota--100--Domain;Stramenopila", asv_matrix_rps10_oldb$dada2_tax)
asv_matrix_rps10_oldb <- asv_matrix_rps10_oldb[, -1]
colnames(asv_matrix_rps10_oldb)[3:ncol(asv_matrix_rps10_oldb)] <- gsub("_rps10$", "", colnames(asv_matrix_rps10_oldb)[3:ncol(asv_matrix_rps10_oldb)])

metadata_path <- file.path("demulticoder/data/metadata.csv")
metadata <- read_csv(metadata_path)
## Rows: 174 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): sample_name, well, organism, sample_type
## dbl (3): plate, path_conc, experiment
## lgl (2): flooded, is_ambiguous
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(metadata)
## # A tibble: 174 × 9
##    sample_name plate well  organism flooded path_conc experiment sample_type
##    <chr>       <dbl> <chr> <chr>    <lgl>       <dbl>      <dbl> <chr>      
##  1 S1              1 A01   Cry      TRUE          100          1 Sample     
##  2 S10             1 B02   Cry      TRUE            1          1 Sample     
##  3 S11             1 C02   Plu      FALSE           1          1 Sample     
##  4 S12             1 D02   Plu      TRUE            1          1 Sample     
##  5 S13             1 E02   Control  TRUE            0          1 Sample     
##  6 S14             1 F02   Cin      TRUE            1          1 Sample     
##  7 S15             1 H02   Cin      FALSE         100          1 Sample     
##  8 S16             1 A03   Cry      FALSE         100          1 Sample     
##  9 S17             1 B03   Cry      TRUE          100          1 Sample     
## 10 S18             1 C03   Control  TRUE            0          1 Sample     
## # ℹ 164 more rows
## # ℹ 1 more variable: is_ambiguous <lgl>

Convert ASV matrices to taxmap objects-updated db

obj_newdb <- parse_tax_data(asv_matrix_rps10, class_cols = 'dada2_tax', class_sep = ';',
                      class_regex = '^(.+)--(.+)--(.+)$',
                      class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj_newdb$data) <- c('abund', 'score')
obj_newdb <- transmute_obs(obj_newdb, 'score', sequence = sequence[input_index], boot = boot, rank = rank)


# For diversity calculations, we'll use proportions of read depth
# We'll set any proportion to 0 that is less than the inverse of the read count of the non-control sample with the fewest reads.
# This should account for unequal sample read depth without the randomness of rarefaction.

metadata$raw_count <- colSums(obj_newdb$data$abund[, metadata$sample_name])
lowest_count <- min(metadata$raw_count[! is.na(metadata$organism)])
lowest_count
## [1] 251
obj_newdb$data$prop <- calc_obs_props(obj_newdb, data = 'abund', cols = metadata$sample_name)
## Calculating proportions from counts for 174 columns for 347 observations.
obj_newdb$data$prop <- zero_low_counts(obj_newdb, data = 'prop', min_count = 1 / lowest_count, cols = metadata$sample_name)
## Zeroing 507 of 60378 counts less than 0.00398406374501992.
obj_newdb$data$prop
## # A tibble: 347 × 175
##    taxon_id      S1    S10   S11    S12    S13    S14   S15    S16    S17    S18
##    <chr>      <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
##  1 bm       0.409   0.0490 0.912 0      0.589  0      0     0.0384 0.704  0.660 
##  2 bn       0       0      0     0      0      0      0.716 0      0      0     
##  3 bo       0       0      0     0.0101 0.0194 0.550  0     0      0      0.0126
##  4 bp       0       0      0     0      0      0      0     0      0      0     
##  5 bq       0       0      0     0      0      0      0     0      0      0     
##  6 br       0.568   0.131  0     0.0530 0.381  0.0148 0     0      0.0917 0.0660
##  7 br       0       0      0     0      0      0.0247 0     0      0      0     
##  8 bs       0       0.818  0     0.774  0      0.391  0.265 0      0      0     
##  9 bp       0       0      0     0.0646 0      0.0156 0     0      0      0     
## 10 bt       0.00624 0      0     0.0416 0      0      0     0.935  0.182  0.209 
## # ℹ 337 more rows
## # ℹ 164 more variables: S2 <dbl>, S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>,
## #   S24 <dbl>, S25 <dbl>, S26 <dbl>, S27 <dbl>, S28 <dbl>, S29 <dbl>, S3 <dbl>,
## #   S31 <dbl>, S32 <dbl>, S33 <dbl>, S34 <dbl>, S35 <dbl>, S36 <dbl>,
## #   S37 <dbl>, S38 <dbl>, S39 <dbl>, S4 <dbl>, S40 <dbl>, S41 <dbl>, S42 <dbl>,
## #   S43 <dbl>, S45 <dbl>, S46 <dbl>, S47 <dbl>, S48 <dbl>, S49 <dbl>, S5 <dbl>,
## #   S50 <dbl>, S51 <dbl>, S52 <dbl>, S53 <dbl>, S54 <dbl>, S55 <dbl>, …
obj_newdb$data$prop[metadata$sample_name] <- map(metadata$sample_name, function(id) {
  out <- obj_newdb$data$prop[[id]]
  out[is.na(out) | is.nan(out)] <- 0
  out
})

obj_newdb$data$prop
## # A tibble: 347 × 175
##    taxon_id      S1    S10   S11    S12    S13    S14   S15    S16    S17    S18
##    <chr>      <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
##  1 bm       0.409   0.0490 0.912 0      0.589  0      0     0.0384 0.704  0.660 
##  2 bn       0       0      0     0      0      0      0.716 0      0      0     
##  3 bo       0       0      0     0.0101 0.0194 0.550  0     0      0      0.0126
##  4 bp       0       0      0     0      0      0      0     0      0      0     
##  5 bq       0       0      0     0      0      0      0     0      0      0     
##  6 br       0.568   0.131  0     0.0530 0.381  0.0148 0     0      0.0917 0.0660
##  7 br       0       0      0     0      0      0.0247 0     0      0      0     
##  8 bs       0       0.818  0     0.774  0      0.391  0.265 0      0      0     
##  9 bp       0       0      0     0.0646 0      0.0156 0     0      0      0     
## 10 bt       0.00624 0      0     0.0416 0      0      0     0.935  0.182  0.209 
## # ℹ 337 more rows
## # ℹ 164 more variables: S2 <dbl>, S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>,
## #   S24 <dbl>, S25 <dbl>, S26 <dbl>, S27 <dbl>, S28 <dbl>, S29 <dbl>, S3 <dbl>,
## #   S31 <dbl>, S32 <dbl>, S33 <dbl>, S34 <dbl>, S35 <dbl>, S36 <dbl>,
## #   S37 <dbl>, S38 <dbl>, S39 <dbl>, S4 <dbl>, S40 <dbl>, S41 <dbl>, S42 <dbl>,
## #   S43 <dbl>, S45 <dbl>, S46 <dbl>, S47 <dbl>, S48 <dbl>, S49 <dbl>, S5 <dbl>,
## #   S50 <dbl>, S51 <dbl>, S52 <dbl>, S53 <dbl>, S54 <dbl>, S55 <dbl>, …

Convert ASV matrices to taxmap objects-old db

obj_olddb <- parse_tax_data(asv_matrix_rps10_oldb, class_cols = 'dada2_tax', class_sep = ';',
                      class_regex = '^(.+)--(.+)--(.+)$',
                      class_key = c(taxon = 'taxon_name', boot = 'info', rank = 'taxon_rank'))
names(obj_olddb$data) <- c('abund', 'score')
obj_olddb <- transmute_obs(obj_olddb, 'score', sequence = sequence[input_index], boot = boot, rank = rank)


# For diversity calculations, we'll use proportions of read depth
# We'll set any proportion to 0 that is less than the inverse of the read count of the non-control sample with the fewest reads.
# This should account for unequal sample read depth without the randomness of rarefaction.

metadata$raw_count <- colSums(obj_olddb$data$abund[, metadata$sample_name])
lowest_count <- min(metadata$raw_count[! is.na(metadata$organism)])
lowest_count
## [1] 251
obj_olddb$data$prop <- calc_obs_props(obj_olddb, data = 'abund', cols = metadata$sample_name)
## Calculating proportions from counts for 174 columns for 347 observations.
obj_olddb$data$prop <- zero_low_counts(obj_olddb, data = 'prop', min_count = 1 / lowest_count, cols = metadata$sample_name)
## Zeroing 507 of 60378 counts less than 0.00398406374501992.
obj_olddb$data$prop
## # A tibble: 347 × 175
##    taxon_id      S1    S10   S11    S12    S13    S14   S15    S16    S17    S18
##    <chr>      <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
##  1 bl       0.409   0.0490 0.912 0      0.589  0      0     0.0384 0.704  0.660 
##  2 bm       0       0      0     0      0      0      0.716 0      0      0     
##  3 bn       0       0      0     0.0101 0.0194 0.550  0     0      0      0.0126
##  4 bl       0       0      0     0      0      0      0     0      0      0     
##  5 bo       0       0      0     0      0      0      0     0      0      0     
##  6 bp       0.568   0.131  0     0.0530 0.381  0.0148 0     0      0.0917 0.0660
##  7 bq       0       0      0     0      0      0.0247 0     0      0      0     
##  8 br       0       0.818  0     0.774  0      0.391  0.265 0      0      0     
##  9 bl       0       0      0     0.0646 0      0.0156 0     0      0      0     
## 10 bs       0.00624 0      0     0.0416 0      0      0     0.935  0.182  0.209 
## # ℹ 337 more rows
## # ℹ 164 more variables: S2 <dbl>, S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>,
## #   S24 <dbl>, S25 <dbl>, S26 <dbl>, S27 <dbl>, S28 <dbl>, S29 <dbl>, S3 <dbl>,
## #   S31 <dbl>, S32 <dbl>, S33 <dbl>, S34 <dbl>, S35 <dbl>, S36 <dbl>,
## #   S37 <dbl>, S38 <dbl>, S39 <dbl>, S4 <dbl>, S40 <dbl>, S41 <dbl>, S42 <dbl>,
## #   S43 <dbl>, S45 <dbl>, S46 <dbl>, S47 <dbl>, S48 <dbl>, S49 <dbl>, S5 <dbl>,
## #   S50 <dbl>, S51 <dbl>, S52 <dbl>, S53 <dbl>, S54 <dbl>, S55 <dbl>, …
obj_olddb$data$prop[metadata$sample_name] <- map(metadata$sample_name, function(id) {
  out <- obj_olddb$data$prop[[id]]
  out[is.na(out) | is.nan(out)] <- 0
  out
})

obj_olddb$data$prop
## # A tibble: 347 × 175
##    taxon_id      S1    S10   S11    S12    S13    S14   S15    S16    S17    S18
##    <chr>      <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
##  1 bl       0.409   0.0490 0.912 0      0.589  0      0     0.0384 0.704  0.660 
##  2 bm       0       0      0     0      0      0      0.716 0      0      0     
##  3 bn       0       0      0     0.0101 0.0194 0.550  0     0      0      0.0126
##  4 bl       0       0      0     0      0      0      0     0      0      0     
##  5 bo       0       0      0     0      0      0      0     0      0      0     
##  6 bp       0.568   0.131  0     0.0530 0.381  0.0148 0     0      0.0917 0.0660
##  7 bq       0       0      0     0      0      0.0247 0     0      0      0     
##  8 br       0       0.818  0     0.774  0      0.391  0.265 0      0      0     
##  9 bl       0       0      0     0.0646 0      0.0156 0     0      0      0     
## 10 bs       0.00624 0      0     0.0416 0      0      0     0.935  0.182  0.209 
## # ℹ 337 more rows
## # ℹ 164 more variables: S2 <dbl>, S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>,
## #   S24 <dbl>, S25 <dbl>, S26 <dbl>, S27 <dbl>, S28 <dbl>, S29 <dbl>, S3 <dbl>,
## #   S31 <dbl>, S32 <dbl>, S33 <dbl>, S34 <dbl>, S35 <dbl>, S36 <dbl>,
## #   S37 <dbl>, S38 <dbl>, S39 <dbl>, S4 <dbl>, S40 <dbl>, S41 <dbl>, S42 <dbl>,
## #   S43 <dbl>, S45 <dbl>, S46 <dbl>, S47 <dbl>, S48 <dbl>, S49 <dbl>, S5 <dbl>,
## #   S50 <dbl>, S51 <dbl>, S52 <dbl>, S53 <dbl>, S54 <dbl>, S55 <dbl>, …

Community composition plot-new db-showcase differences in bootstrap support values

min_bootstrap <- 0
obj_newdb$data$score$boot <- as.numeric(obj_newdb$data$score$boot)
max_boot <- obj_newdb$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_newdb <- filter_taxa(obj_newdb, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))

obj_newdb$data$asv_prop <- calc_obs_props(obj_newdb, 'abund', cols = metadata$sample_name)
## Calculating proportions from counts for 174 columns for 347 observations.
obj_newdb$data$tax_abund <- calc_taxon_abund(obj_newdb, 'abund', cols = metadata$sample_name)
## Summing per-taxon counts from 174 columns for 143 taxa
obj_newdb$data$tax_prop <- calc_taxon_abund(obj_newdb, 'asv_prop', cols = metadata$sample_name)
## Summing per-taxon counts from 174 columns for 143 taxa
obj_newdb$data$tax_data <- calc_n_samples(obj_newdb, 'tax_prop', cols = metadata$sample_name[metadata$sample_type == 'Sample'])
## Calculating number of samples with a value greater than 0 for 162 columns for 143 observations
obj_newdb$data$tax_data$mean_prop <- rowMeans(obj_newdb$data$tax_prop[, metadata$sample_name])

obj_newdb %>%
  filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE)) 
## <Taxmap>
##   143 taxa: ab. Eukaryota ... fn. Phytopythium_ostracodes
##   143 edges: NA->ab, ab->ac, ac->ad ... av->fl, ba->fm, aw->fn
##   7 data sets:
##     abund:
##       # A tibble: 347 × 177
##         taxon_id sequence      dada2_tax    S1   S10  S100  S101  S102
##         <chr>    <chr>         <chr>     <int> <int> <int> <int> <int>
##       1 bm       GAAAATCTTTGT… Eukaryot… 11976  1123     0     0  1125
##       2 bn       GAAAATCTTTGT… Eukaryot…     0     0     0     0    17
##       3 bo       GAAAATCTTTGT… Eukaryot…     0     0 15876 11386  9976
##       # ℹ 344 more rows
##       # ℹ 169 more variables: S103 <int>, S104 <int>, S105 <int>,
##       #   S106 <int>, S107 <int>, S108 <int>, S109 <int>, S11 <int>,
##       #   S110 <int>, S111 <int>, …
##     score:
##       # A tibble: 2,776 × 4
##         taxon_id sequence                                   boot rank 
##         <chr>    <chr>                                     <dbl> <chr>
##       1 ab       GAAAATCTTTGTGTCGGTGGTTCAAGTCCGCCTCCAAACA…   100 Doma…
##       2 ac       GAAAATCTTTGTGTCGGTGGTTCAAGTCCGCCTCCAAACA…   100 King…
##       3 ad       GAAAATCTTTGTGTCGGTGGTTCAAGTCCGCCTCCAAACA…   100 Phyl…
##       # ℹ 2,773 more rows
##     prop:
##       # A tibble: 347 × 175
##         taxon_id    S1    S10   S11    S12    S13   S14   S15    S16
##         <chr>    <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>
##       1 bm       0.409 0.0490 0.912 0      0.589  0     0     0.0384
##       2 bn       0     0      0     0      0      0     0.716 0     
##       3 bo       0     0      0     0.0101 0.0194 0.550 0     0     
##       # ℹ 344 more rows
##       # ℹ 166 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>,
##       #   S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>,
##       #   S25 <dbl>, S26 <dbl>, …
##     asv_prop:
##       # A tibble: 347 × 175
##         taxon_id    S1    S10   S11    S12    S13   S14   S15    S16
##         <chr>    <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>
##       1 bm       0.409 0.0490 0.912 0      0.589  0     0     0.0384
##       2 bn       0     0      0     0      0      0     0.716 0     
##       3 bo       0     0      0     0.0101 0.0194 0.550 0     0     
##       # ℹ 344 more rows
##       # ℹ 166 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>,
##       #   S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>,
##       #   S25 <dbl>, S26 <dbl>, …
##     tax_abund:
##       # A tibble: 143 × 175
##         taxon_id    S1   S10   S11   S12   S13   S14   S15   S16   S17
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab       29314 22903  6235 49267 19302 58145 33859 27995 32935
##       2 ac       29314 22903  6235 49267 19302 58145 33859 27995 32935
##       3 ad       29314 22903  6235 49267 19302 58145 33859 27995 32935
##       # ℹ 140 more rows
##       # ℹ 165 more variables: S18 <dbl>, S2 <dbl>, S20 <dbl>,
##       #   S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>,
##       #   S26 <dbl>, S27 <dbl>, …
##     tax_prop:
##       # A tibble: 143 × 175
##         taxon_id    S1   S10   S11   S12   S13   S14   S15   S16   S17
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab           1     1     1     1     1     1     1     1     1
##       2 ac           1     1     1     1     1     1     1     1     1
##       3 ad           1     1     1     1     1     1     1     1     1
##       # ℹ 140 more rows
##       # ℹ 165 more variables: S18 <dbl>, S2 <dbl>, S20 <dbl>,
##       #   S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>,
##       #   S26 <dbl>, S27 <dbl>, …
##     And 1 more data sets: tax_data
##   0 functions:
# Calculate the relative standard deviation for each taxon as a measure of how consistently it was found.
rsd <- function(x, na.rm = FALSE) {sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)}
obj_newdb$data$tax_data$rel_stand_dev <- map_dbl(1:nrow(obj_newdb$data$tax_abund), function(i) {
  rsd(unlist(obj_newdb$data$tax_abund[i, metadata$sample_name]), na.rm = TRUE)
})

max_boot <- obj_newdb$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))

# Map the maximum bootstrap support to taxon IDs
max_boot <- setNames(max_boot$max, max_boot$taxon_id)

# Add maximum bootstrap support to tax_data
obj_newdb$data$tax_data$max_boot <- max_boot[match(obj_newdb$data$tax_data$taxon_id, names(max_boot))]

set.seed(10)

obj_newdb %>%
  filter_taxa(! is_stem) %>%
  filter_taxa(n_samples >= 1, supertaxa = TRUE, reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "_sp$"), reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "incertae_sedis", ignore.case = TRUE), reassign_obs = FALSE) %>%
  remove_redundant_names() %>%
  heat_tree(node_size = mean_prop,
            edge_size = n_samples,
            node_color = ifelse(is.na(max_boot), 0, max_boot),
            node_color_range = c("#D2042D","#fc8d62","#a6d854","#66c2a5","lightblue","#b8c0cf"),
            node_label = taxon_names,
            node_size_range = c(0.01, 0.03), 
            node_label_size_range = c(0.014, 0.02),
            edge_label_size_range = c(0.012, 0.015), 
            node_size_interval = c(0, 1),
            edge_size_range = c(0.002, 0.01), 
            layout = "da", initial_layout = "re",
            node_color_axis_label = "Bootstrap support",
            node_size_axis_label = "Mean proportion of reads",
            edge_size_axis_label = "Number of samples",
            node_color_digits = 2,
            node_size_digits = 2,
            edge_color_digits = 2,
            edge_size_digits = 2,
            #aspect_ratio = 1.618,
            output_file = file.path('demulticoder/figures', '/heattree_mostabund_taxa_rps10_newdb_boot0_greater1.pdf'))

Community composition plot-old db-showcase differences in bootstrap support values

### Community composition plot-new db-showcase differences in bootstrap support values
min_bootstrap <- 0
obj_olddb$data$score$boot <- as.numeric(obj_olddb$data$score$boot)
max_boot <- obj_olddb$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))
max_boot <- setNames(max_boot$max, max_boot$taxon_id)
obj_olddb <- filter_taxa(obj_olddb, max_boot[taxon_ids] >= min_bootstrap | taxon_ranks %in% c("ASV", "Reference"), reassign_obs = c(abund = TRUE, score = FALSE))

obj_olddb$data$asv_prop <- calc_obs_props(obj_olddb, 'abund', cols = metadata$sample_name)
## Calculating proportions from counts for 174 columns for 347 observations.
obj_olddb$data$tax_abund <- calc_taxon_abund(obj_olddb, 'abund', cols = metadata$sample_name)
## Summing per-taxon counts from 174 columns for 145 taxa
obj_olddb$data$tax_prop <- calc_taxon_abund(obj_olddb, 'asv_prop', cols = metadata$sample_name)
## Summing per-taxon counts from 174 columns for 145 taxa
obj_olddb$data$tax_data <- calc_n_samples(obj_olddb, 'tax_prop', cols = metadata$sample_name[metadata$sample_type == 'Sample'])
## Calculating number of samples with a value greater than 0 for 162 columns for 145 observations
obj_olddb$data$tax_data$mean_prop <- rowMeans(obj_olddb$data$tax_prop[, metadata$sample_name])

obj_olddb %>%
  filter_taxa(taxon_ranks == "Species", supertaxa = TRUE, reassign_obs = c(tax_abund = FALSE)) 
## <Taxmap>
##   145 taxa: ab. Eukaryota ... fp. Lagena_sp
##   145 edges: NA->ab, ab->ac, ac->ad ... az->fn, at->fo, bk->fp
##   7 data sets:
##     abund:
##       # A tibble: 347 × 177
##         taxon_id sequence      dada2_tax    S1   S10  S100  S101  S102
##         <chr>    <chr>         <chr>     <int> <int> <int> <int> <int>
##       1 bl       GAAAATCTTTGT… Eukaryot… 11976  1123     0     0  1125
##       2 bm       GAAAATCTTTGT… Eukaryot…     0     0     0     0    17
##       3 bn       GAAAATCTTTGT… Eukaryot…     0     0 15876 11386  9976
##       # ℹ 344 more rows
##       # ℹ 169 more variables: S103 <int>, S104 <int>, S105 <int>,
##       #   S106 <int>, S107 <int>, S108 <int>, S109 <int>, S11 <int>,
##       #   S110 <int>, S111 <int>, …
##     score:
##       # A tibble: 2,776 × 4
##         taxon_id sequence                                   boot rank 
##         <chr>    <chr>                                     <dbl> <chr>
##       1 ab       GAAAATCTTTGTGTCGGTGGTTCAAGTCCGCCTCCAAACA…   100 Doma…
##       2 ac       GAAAATCTTTGTGTCGGTGGTTCAAGTCCGCCTCCAAACA…   100 King…
##       3 ad       GAAAATCTTTGTGTCGGTGGTTCAAGTCCGCCTCCAAACA…   100 Phyl…
##       # ℹ 2,773 more rows
##     prop:
##       # A tibble: 347 × 175
##         taxon_id    S1    S10   S11    S12    S13   S14   S15    S16
##         <chr>    <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>
##       1 bl       0.409 0.0490 0.912 0      0.589  0     0     0.0384
##       2 bm       0     0      0     0      0      0     0.716 0     
##       3 bn       0     0      0     0.0101 0.0194 0.550 0     0     
##       # ℹ 344 more rows
##       # ℹ 166 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>,
##       #   S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>,
##       #   S25 <dbl>, S26 <dbl>, …
##     asv_prop:
##       # A tibble: 347 × 175
##         taxon_id    S1    S10   S11    S12    S13   S14   S15    S16
##         <chr>    <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>
##       1 bl       0.409 0.0490 0.912 0      0.589  0     0     0.0384
##       2 bm       0     0      0     0      0      0     0.716 0     
##       3 bn       0     0      0     0.0101 0.0194 0.550 0     0     
##       # ℹ 344 more rows
##       # ℹ 166 more variables: S17 <dbl>, S18 <dbl>, S2 <dbl>,
##       #   S20 <dbl>, S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>,
##       #   S25 <dbl>, S26 <dbl>, …
##     tax_abund:
##       # A tibble: 145 × 175
##         taxon_id    S1   S10   S11   S12   S13   S14   S15   S16   S17
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab       29314 22903  6235 49267 19302 58145 33859 27995 32935
##       2 ac       29314 22903  6235 49267 19302 58145 33859 27995 32935
##       3 ad       29314 22903  6235 49267 19302 58145 33859 27995 32935
##       # ℹ 142 more rows
##       # ℹ 165 more variables: S18 <dbl>, S2 <dbl>, S20 <dbl>,
##       #   S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>,
##       #   S26 <dbl>, S27 <dbl>, …
##     tax_prop:
##       # A tibble: 145 × 175
##         taxon_id    S1   S10   S11   S12   S13   S14   S15   S16   S17
##         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##       1 ab           1     1     1     1     1     1     1     1     1
##       2 ac           1     1     1     1     1     1     1     1     1
##       3 ad           1     1     1     1     1     1     1     1     1
##       # ℹ 142 more rows
##       # ℹ 165 more variables: S18 <dbl>, S2 <dbl>, S20 <dbl>,
##       #   S21 <dbl>, S22 <dbl>, S23 <dbl>, S24 <dbl>, S25 <dbl>,
##       #   S26 <dbl>, S27 <dbl>, …
##     And 1 more data sets: tax_data
##   0 functions:
# Calculate the relative standard deviation for each taxon as a measure of how consistently it was found.
rsd <- function(x, na.rm = FALSE) {sd(x, na.rm = na.rm) / mean(x, na.rm = na.rm)}
obj_olddb$data$tax_data$rel_stand_dev <- map_dbl(1:nrow(obj_olddb$data$tax_abund), function(i) {
  rsd(unlist(obj_olddb$data$tax_abund[i, metadata$sample_name]), na.rm = TRUE)
})

max_boot <- obj_olddb$data$score %>%
  group_by(taxon_id) %>%
  summarise(max = max(boot))

# Map the maximum bootstrap support to taxon IDs
max_boot <- setNames(max_boot$max, max_boot$taxon_id)

# Add maximum bootstrap support to tax_data
obj_olddb$data$tax_data$max_boot <- max_boot[match(obj_olddb$data$tax_data$taxon_id, names(max_boot))]


set.seed(10)

obj_olddb %>%
  filter_taxa(! is_stem) %>%
  filter_taxa(n_samples >= 1, supertaxa = TRUE, reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "_sp$"), reassign_obs = FALSE) %>%
  filter_taxa(! grepl(x = taxon_names, "incertae_sedis", ignore.case = TRUE), reassign_obs = FALSE) %>%
  remove_redundant_names() %>%
  heat_tree(node_size = mean_prop,
            edge_size = n_samples,
            node_color = ifelse(is.na(max_boot), 0, max_boot),
            node_color_range = c("#D2042D","#fc8d62","#a6d854","#66c2a5","lightblue","#b8c0cf"),
            node_label = taxon_names,
            node_size_range = c(0.01, 0.03), 
            node_label_size_range = c(0.014, 0.02),
            edge_label_size_range = c(0.012, 0.015), 
            node_size_interval = c(0, 1),
            edge_size_range = c(0.002, 0.01), 
            layout = "da", initial_layout = "re",
            node_color_axis_label = "Bootstrap support",
            node_size_axis_label = "Mean proportion of reads",
            edge_size_axis_label = "Number of samples",
            node_color_digits = 2,
            node_size_digits = 2,
            edge_color_digits = 2,
            edge_size_digits = 2,
            #aspect_ratio = 1.618,
            output_file = file.path('demulticoder/figures', '/heattree_mostabund_taxa_rps10_olddb_boot0_greater1.pdf'))

Let’s facet the two plots so we have a side-by-side comparison of the two databases.

Summary stats comparing db

## Proportion of times new db value is higher than old db value: 0.4092219
## Proportion of times new db value is less than old db value: 0.4899135
## Proportion of times new db value is equal to old db value: 0.1008646
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31)
##  os       macOS Ventura 13.2.1
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/Los_Angeles
##  date     2025-01-08
##  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  abind                  1.4-5      2016-07-21 [1] CRAN (R 4.3.0)
##  ade4                   1.7-22     2023-02-06 [1] CRAN (R 4.3.0)
##  agricolae              1.3-7      2023-10-22 [1] CRAN (R 4.3.1)
##  AlgDesign              1.2.1      2022-05-25 [1] CRAN (R 4.3.0)
##  ape                    5.8        2024-04-11 [1] CRAN (R 4.3.1)
##  askpass                1.2.0      2023-09-03 [1] CRAN (R 4.3.0)
##  backports              1.5.0      2024-05-23 [1] CRAN (R 4.3.3)
##  Biobase                2.62.0     2023-10-26 [1] Bioconductor
##  BiocGenerics         * 0.48.1     2023-11-02 [1] Bioconductor
##  BiocParallel           1.36.0     2023-10-26 [1] Bioconductor
##  biomformat             1.30.0     2023-10-26 [1] Bioconductor
##  Biostrings           * 2.70.3     2024-04-03 [1] bioc_xgit (@c213e35)
##  bit                    4.5.0.1    2024-12-03 [1] CRAN (R 4.3.3)
##  bit64                  4.5.2      2024-09-22 [1] CRAN (R 4.3.3)
##  bitops                 1.0-8      2024-07-29 [1] CRAN (R 4.3.3)
##  broom                  1.0.6      2024-05-17 [1] CRAN (R 4.3.3)
##  bslib                  0.8.0      2024-07-29 [1] CRAN (R 4.3.3)
##  cachem                 1.1.0      2024-05-16 [1] CRAN (R 4.3.2)
##  car                    3.1-2      2023-03-30 [1] CRAN (R 4.3.0)
##  carData                3.0-5      2022-01-06 [1] CRAN (R 4.3.0)
##  cli                    3.6.3      2024-06-21 [1] CRAN (R 4.3.2)
##  cluster                2.1.6      2023-12-01 [1] CRAN (R 4.3.1)
##  codetools              0.2-20     2024-03-31 [1] CRAN (R 4.3.1)
##  colorspace             2.1-1      2024-07-26 [1] CRAN (R 4.3.3)
##  cowplot              * 1.1.3      2024-01-22 [1] CRAN (R 4.3.1)
##  crayon                 1.5.3      2024-06-20 [1] CRAN (R 4.3.3)
##  data.table           * 1.15.4     2024-03-30 [1] CRAN (R 4.3.1)
##  decontam             * 1.22.0     2023-10-26 [1] Bioconductor
##  DelayedArray           0.28.0     2023-11-06 [1] Bioconductor
##  DESeq2                 1.42.1     2024-03-09 [1] Bioconductor 3.18 (R 4.3.3)
##  digest                 0.6.37     2024-08-19 [1] CRAN (R 4.3.3)
##  dplyr                * 1.1.4      2023-11-17 [1] CRAN (R 4.3.1)
##  evaluate               1.0.1      2024-10-10 [1] CRAN (R 4.3.3)
##  farver                 2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
##  fastmap                1.2.0      2024-05-15 [1] CRAN (R 4.3.2)
##  forcats              * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
##  foreach                1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
##  furrr                * 0.3.1      2022-08-15 [1] CRAN (R 4.3.0)
##  future               * 1.34.0     2024-07-29 [1] CRAN (R 4.3.3)
##  generics               0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
##  GenomeInfoDb         * 1.38.8     2024-03-16 [1] Bioconductor 3.18 (R 4.3.3)
##  GenomeInfoDbData       1.2.11     2023-11-14 [1] Bioconductor
##  GenomicRanges          1.54.1     2023-10-30 [1] Bioconductor
##  ggfittext              0.10.2     2024-02-01 [1] CRAN (R 4.3.1)
##  ggplot2              * 3.5.1      2024-04-23 [1] CRAN (R 4.3.1)
##  ggpubr                 0.6.0      2023-02-10 [1] CRAN (R 4.3.0)
##  ggsignif               0.6.4      2022-10-13 [1] CRAN (R 4.3.0)
##  globals                0.16.3     2024-03-08 [1] CRAN (R 4.3.1)
##  glue                   1.8.0      2024-09-30 [1] CRAN (R 4.3.3)
##  gridExtra            * 2.3        2017-09-09 [1] CRAN (R 4.3.0)
##  gtable                 0.3.6      2024-10-25 [1] CRAN (R 4.3.3)
##  hms                    1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
##  htmltools              0.5.8.1    2024-04-04 [1] CRAN (R 4.3.1)
##  igraph                 2.0.3      2024-03-13 [1] CRAN (R 4.3.1)
##  IRanges              * 2.36.0     2023-10-26 [1] Bioconductor
##  iterators              1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
##  jquerylib              0.1.4      2021-04-26 [1] CRAN (R 4.3.0)
##  jsonlite               1.8.8      2023-12-04 [1] CRAN (R 4.3.1)
##  knitr                  1.49       2024-11-08 [1] CRAN (R 4.3.3)
##  labeling               0.4.3      2023-08-29 [1] CRAN (R 4.3.0)
##  lattice              * 0.22-6     2024-03-20 [1] CRAN (R 4.3.1)
##  lazyeval               0.2.2      2019-03-15 [1] CRAN (R 4.3.0)
##  lifecycle              1.0.4      2023-11-07 [1] CRAN (R 4.3.1)
##  listenv                0.9.1      2024-01-29 [1] CRAN (R 4.3.1)
##  locfit                 1.5-9.10   2024-06-24 [1] CRAN (R 4.3.3)
##  magick               * 2.8.4      2024-07-14 [1] CRAN (R 4.3.3)
##  magrittr               2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
##  MASS                   7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.1)
##  Matrix                 1.6-5      2024-01-11 [1] CRAN (R 4.3.1)
##  MatrixGenerics         1.14.0     2023-10-26 [1] Bioconductor
##  matrixStats            1.3.0      2024-04-11 [1] CRAN (R 4.3.1)
##  metacoder            * 0.3.7      2024-02-20 [1] CRAN (R 4.3.1)
##  mgcv                   1.9-1      2023-12-21 [1] CRAN (R 4.3.1)
##  multtest               2.58.0     2023-10-26 [1] Bioconductor
##  munsell                0.5.1      2024-04-01 [1] CRAN (R 4.3.1)
##  nlme                   3.1-165    2024-06-06 [1] CRAN (R 4.3.3)
##  parallelly             1.41.0     2024-12-18 [1] CRAN (R 4.3.3)
##  pdftools             * 3.4.1      2024-09-20 [1] CRAN (R 4.3.3)
##  permute              * 0.9-7      2022-01-27 [1] CRAN (R 4.3.0)
##  phyloseq             * 1.46.0     2024-04-03 [1] bioc_xgit (@7320133)
##  pillar                 1.10.0     2024-12-17 [1] CRAN (R 4.3.3)
##  pkgconfig              2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
##  plyr                   1.8.9      2023-10-02 [1] CRAN (R 4.3.1)
##  purrr                * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
##  qpdf                   1.3.4      2024-10-04 [1] CRAN (R 4.3.3)
##  R6                     2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
##  ragg                   1.3.2      2024-05-15 [1] CRAN (R 4.3.3)
##  Rcpp                   1.0.13     2024-07-17 [1] CRAN (R 4.3.2)
##  RCurl                  1.98-1.16  2024-07-11 [1] CRAN (R 4.3.3)
##  readr                * 2.1.5      2024-01-10 [1] CRAN (R 4.3.1)
##  reshape2               1.4.4      2020-04-09 [1] CRAN (R 4.3.0)
##  rhdf5                  2.46.1     2023-12-02 [1] Bioconductor 3.18 (R 4.3.2)
##  rhdf5filters           1.14.1     2023-11-06 [1] Bioconductor
##  Rhdf5lib               1.24.2     2024-02-10 [1] Bioconductor 3.18 (R 4.3.2)
##  rlang                  1.1.4      2024-06-04 [1] CRAN (R 4.3.2)
##  rmarkdown              2.27       2024-05-17 [1] CRAN (R 4.3.3)
##  rstatix                0.7.2      2023-02-01 [1] CRAN (R 4.3.0)
##  rstudioapi             0.16.0     2024-03-24 [1] CRAN (R 4.3.1)
##  S4Arrays               1.2.1      2024-03-05 [1] Bioconductor 3.18 (R 4.3.2)
##  S4Vectors            * 0.40.2     2023-11-25 [1] Bioconductor 3.18 (R 4.3.2)
##  sass                   0.4.9      2024-03-15 [1] CRAN (R 4.3.1)
##  scales                 1.3.0      2023-11-28 [1] CRAN (R 4.3.1)
##  sessioninfo            1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
##  SparseArray            1.2.4      2024-02-10 [1] Bioconductor 3.18 (R 4.3.2)
##  stringi                1.8.4      2024-05-06 [1] CRAN (R 4.3.2)
##  stringr              * 1.5.1      2023-11-14 [1] CRAN (R 4.3.1)
##  SummarizedExperiment   1.32.0     2023-11-06 [1] Bioconductor
##  survival               3.7-0      2024-06-05 [1] CRAN (R 4.3.3)
##  svglite                2.1.3      2023-12-08 [1] CRAN (R 4.3.1)
##  systemfonts            1.1.0      2024-05-15 [1] CRAN (R 4.3.3)
##  textshaping            0.4.0      2024-05-24 [1] CRAN (R 4.3.3)
##  tibble                 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
##  tidyr                * 1.3.1      2024-01-24 [1] CRAN (R 4.3.1)
##  tidyselect             1.2.1      2024-03-11 [1] CRAN (R 4.3.1)
##  tzdb                   0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
##  utf8                   1.2.4      2023-10-22 [1] CRAN (R 4.3.1)
##  vctrs                  0.6.5      2023-12-01 [1] CRAN (R 4.3.1)
##  vegan                * 2.6-6.1    2024-05-21 [1] CRAN (R 4.3.3)
##  viridis                0.6.5      2024-01-29 [1] CRAN (R 4.3.1)
##  viridisLite            0.4.2      2023-05-02 [1] CRAN (R 4.3.0)
##  vroom                  1.6.5      2023-12-05 [1] CRAN (R 4.3.1)
##  withr                  3.0.2      2024-10-28 [1] CRAN (R 4.3.3)
##  xfun                   0.49       2024-10-31 [1] CRAN (R 4.3.3)
##  XVector              * 0.42.0     2023-10-26 [1] Bioconductor
##  yaml                   2.3.10     2024-07-26 [1] CRAN (R 4.3.3)
##  zlibbioc               1.48.2     2024-03-19 [1] Bioconductor 3.18 (R 4.3.3)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────